####* geocoding zipcodes in the dataset and finding the relevant VMW firm facilities


############## zipcodes #################################


client_temp <- read.csv("data/credit_burea/Client_Temp.csv") %>% clean_names() %>% 
  filter(clt_legalcountry == "United States")

### firms with available address information 
client_temp %>% nrow()
client_temp <- client_temp %>% filter(!is.na(clt_legalzip))
client_temp %>% nrow()

zipcodes <- client_temp$clt_legalzip %>% as.character() %>%  unique()
zipcodes <- geocode_zip(zipcodes) %>% full_join(data.frame(zipcode = zipcodes))

zipcodes <- read.csv(paste0(project_dir,"data/store_locations/ZIP_Code_Population_Weighted_Centroids.csv")) %>% 
  select(zipcode = STD_ZIP5,  LAT, LGT) %>% 
  mutate(zipcode = str_pad(zipcode, 5, side = 'left', pad = '0')) %>% 
  right_join(zipcodes) %>% 
  mutate(lat = ifelse(is.na(LAT), lat, LAT),
         long = ifelse(is.na(LGT), lng, LGT)) %>% 
  select(zipcode, lat, long)

write_rds(zipcodes, "data/tmp_data/zipcodes_centroids.rds")              

client_temp <- full_join(client_temp, zipcodes %>% select(clt_legalzip = zipcode,
                                                          lat_centroid = lat,
                                                          long_centroid = long))
write.csv(client_temp, "data/credit_bureau/client_temp_centroids added.csv")
#########################################


########## VMW with trt_exp == 23 :facilities geolocation and saving them ###################
trt_23 <- read_xlsx("data/store_locations/trt_23 Distribution Network V10.0 2022-06-21 - Short.xlsx",
                 sheet = "1")
fields <- as.character(trt_23[4,])
trt_23 <- trt_23[6:nrow(trt_23), c(1:6,11:21,33)]
colnames(trt_23) <- c("serial", "country", "type", "existing", "open", "codename",
                   "address", "city", "state", "zipcode", "latitude", "longitude", 
                   "nearest_metro_market", "year_opened", "month_opened", "date_opened",
                   "date_closed", "permanent_staff")
date_with_text <- function(x, origin = "1899-12-30"){
  num <- suppressWarnings(as.numeric(x))
  y <- as.Date(num, origin = origin)
  if(anyNA(num)){
    y <- as.character(y)
    y[is.na(num)] <- as.character(x[is.na(num)])
  }
  y
}

trt_23 <- trt_23 %>% filter(country == "USA")
nrow(trt_23)
trt_23 <- trt_23 %>% mutate(date_closed = date_with_text(date_closed),
                      date_opened = date_with_text(date_opened)) %>% 
  filter(!is.na(year_opened) & !is.na(month_opened))
nrow(trt_23)
trt_23 <- trt_23 %>% filter(!is.na(longitude))
nrow(trt_23)

trt_23 <- trt_23 %>%
  mutate(date_opened = case_when(
    !is.na(date_opened) & !str_detect(as.character(date_opened),"/") ~ date_opened,
    month_opened %in% month.name ~ 
      paste0(year_opened,"-",
             match(month_opened, month.name),
             "-", "01"),
    month_opened %in% c("Q1","Q2","Q3","Q4") ~
      paste0(year_opened,"-",
             (match(month_opened, c("Q1","Q2","Q3","Q4"))*3-2),
             "-", "01"),
    month_opened %in% c("Autumn", "Fall") ~ 
      paste0(year_opened, "-", "09", "-", "01"),
    T ~ "NA") %>% as.Date(),
    permanent_staff = as.numeric(permanent_staff)) 
write_rds(trt_23, "data/tmp_data/trt_23.rds")

############################################################


trt_19 <- read_excel("data/store_locations/trt_19 Distribution Network 2024 November.xlsx",
                     sheet = "2")

trt_3 = read_xlsx("data/store_locations/trt_3 Distribution Network 2024 November.xlsx", sheet = "2")


########## trt_19 facilities geolocation and saving them ###################

fields_trt_19 <- as.character(trt_19[4,])
## columns to keep: 1, 2, 3, 5, 6, 8 - 11, 13, 14 16, 17-20, 26

## Address == street 
## storenumber == codename
trt_19 <- trt_19[c(1:3,5:6,8:11,13:14,16:20,26)]
trt_19 <- trt_19[-c(1:8), ]
colnames(trt_19) <- c("serial", "codename", "country", "type", "open", "address", "city", "state", "zipcode", "latitude", "longitude",
                      "nearest_metro_area", "year_opened", "month_opened", "date_opened", "date_closed", "permanent_staff")

date_with_text <- function(x, origin = "1899-12-30"){
  num <- suppressWarnings(as.numeric(x))
  y <- as.Date(num, origin = origin)
  if(anyNA(num)){
    y <- as.character(y)
    y[is.na(num)] <- as.character(x[is.na(num)])
  }
  y
}

trt_19 <- trt_19 %>% filter(country == "USA")
nrow(trt_19)
trt_19 <- trt_19 %>% mutate(date_closed = date_with_text(date_closed),
                            date_opened = date_with_text(date_opened)) %>% 
  filter(!is.na(year_opened) & !is.na(month_opened))
nrow(trt_19)
trt_19 <- trt_19 %>% filter(!is.na(longitude))
nrow(trt_19)

trt_19 <- trt_19 %>%
  mutate(date_opened = case_when(
    !is.na(date_opened) & !str_detect(as.character(date_opened),"/") ~ date_opened,
    month_opened %in% month.name ~ 
      paste0(year_opened,"-",
             match(month_opened, month.name),
             "-", "01"),
    month_opened %in% c("Q1","Q2","Q3","Q4") ~
      paste0(year_opened,"-",
             (match(month_opened, c("Q1","Q2","Q3","Q4"))*3-2),
             "-", "01"),
    month_opened %in% c("Autumn", "Fall") ~ 
      paste0(year_opened, "-", "09", "-", "01"),
    T ~ NA) %>% as.Date(),
    permanent_staff = as.numeric(permanent_staff)) 
write_rds(trt_19, "data/tmp_data/trt_19.rds")


########## trt_3 facilities geolocation and saving them ###################

fields_trt_3 <- as.character(trt_3[5,])


trt_3 <- trt_3[-c(1:13), ]
trt_3 <- trt_3[c(1:3,5:6,8:11,13:14,16:20,25)]
colnames(trt_3) <- c("serial", "codename", "country", "type", "open", "address","city", "state", "zipcode",
                       "latitude", "longitude", "nearest_metro_market", "year_opened", "month_opened", "date_opened",
                       "date_closed", "permanent_staff")

date_with_text <- function(x, origin = "1899-12-30"){
  num <- suppressWarnings(as.numeric(x))
  y <- as.Date(num, origin = origin)
  if(anyNA(num)){
    y <- as.character(y)
    y[is.na(num)] <- as.character(x[is.na(num)])
  }
  y
}

trt_3 <- trt_3 %>% filter(country == "USA")
nrow(trt_3)
trt_3 <- trt_3 %>% mutate(date_closed = date_with_text(date_closed),
                              date_opened = date_with_text(date_opened)) %>% 
  filter(!is.na(year_opened) & !is.na(month_opened))
nrow(trt_3)
trt_3 <- trt_3 %>% filter(!is.na(longitude))
nrow(trt_3)


trt_3 <- trt_3 %>%
  mutate(date_opened = case_when(
    !is.na(date_opened) & !str_detect(as.character(date_opened),"/") ~ as.Date(date_opened),
    month_opened %in% month.name ~ 
      as.Date(paste0(year_opened,"-",
                     sprintf("%02d", match(month_opened, month.name)),
                     "-", "01")),
    TRUE ~ as.Date(NA)
  ))
trt_3$permanent_staff <- as.numeric(trt_3$permanent_staff)
write_rds(trt_3, "data/tmp_data/trt_3.rds")




